from pyspark.ml import Pipeline
from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionWithSGD, LogisticRegressionModel
from pyspark.ml.feature import IDF, HashingTF, Tokenizer, Tokenizer, StopWordsRemover, RegexTokenizer, CountVectorizer, StringIndexer
from pyspark.sql import Row
from pyspark.sql.functions import UserDefinedFunction, col, isnull, isnan, when, count
from pyspark.sql.types import *
from pyspark.ml.clustering import LDA
yelp_review = spark.read.csv('wasb://main@yelpproject.blob.core.windows.net/yelp_review_tab.csv', sep ='\t', header=True, mode="DROPMALFORMED", inferSchema=True)
yelp_review.write.saveAsTable("yelp_review", mode="overwrite")
yelp_review.printSchema()
root |-- _c0: string (nullable = true) |-- review_id: string (nullable = true) |-- user_id: string (nullable = true) |-- business_id: string (nullable = true) |-- stars: integer (nullable = true) |-- date: timestamp (nullable = true) |-- text: string (nullable = true) |-- useful: integer (nullable = true) |-- funny: integer (nullable = true) |-- cool: integer (nullable = true)
yelp_business = spark.read.csv('wasb://main@yelpproject.blob.core.windows.net/yelp_business.csv', sep =',', header=True, mode="DROPMALFORMED", inferSchema=True)
yelp_business.write.saveAsTable("yelp_business", mode="overwrite")
yelp_business.printSchema()
root |-- business_id: string (nullable = true) |-- name: string (nullable = true) |-- neighborhood: string (nullable = true) |-- address: string (nullable = true) |-- city: string (nullable = true) |-- state: string (nullable = true) |-- postal_code: string (nullable = true) |-- latitude: string (nullable = true) |-- longitude: string (nullable = true) |-- stars: string (nullable = true) |-- review_count: string (nullable = true) |-- is_open: double (nullable = true) |-- categories: string (nullable = true)
%%sql
SHOW TABLES
from pyspark.ml import Pipeline
from pyspark.mllib.classification import LogisticRegressionWithLBFGS, LogisticRegressionWithSGD, LogisticRegressionModel
from pyspark.ml.feature import IDF, HashingTF, Tokenizer, Tokenizer, StopWordsRemover, RegexTokenizer, CountVectorizer, StringIndexer
from pyspark.sql import Row
from pyspark.sql.functions import UserDefinedFunction, col, isnull, isnan, when, count
from pyspark.sql.types import *
from pyspark.ml.clustering import LDA
yelp_review = spark.read.csv('wasb://main@yelpproject.blob.core.windows.net/yelp_review_tab.csv', sep ='\t', header=True, mode="DROPMALFORMED", inferSchema=True)
yelp_review.write.saveAsTable("yelp_review", mode="overwrite")
yelp_review.printSchema()
root |-- _c0: string (nullable = true) |-- review_id: string (nullable = true) |-- user_id: string (nullable = true) |-- business_id: string (nullable = true) |-- stars: integer (nullable = true) |-- date: timestamp (nullable = true) |-- text: string (nullable = true) |-- useful: integer (nullable = true) |-- funny: integer (nullable = true) |-- cool: integer (nullable = true)
yelp_business = spark.read.csv('wasb://main@yelpproject.blob.core.windows.net/yelp_business.csv', sep =',', header=True, mode="DROPMALFORMED", inferSchema=True)
yelp_business.write.saveAsTable("yelp_business", mode="overwrite")
yelp_business.printSchema()
root |-- business_id: string (nullable = true) |-- name: string (nullable = true) |-- neighborhood: string (nullable = true) |-- address: string (nullable = true) |-- city: string (nullable = true) |-- state: string (nullable = true) |-- postal_code: string (nullable = true) |-- latitude: string (nullable = true) |-- longitude: string (nullable = true) |-- stars: string (nullable = true) |-- review_count: string (nullable = true) |-- is_open: double (nullable = true) |-- categories: string (nullable = true)
%%sql
SHOW TABLES
| database | tableName | isTemporary | |
|---|---|---|---|
| 0 | default | cities_df | False |
| 1 | default | hivesampletable | False |
| 2 | default | yelp_business | False |
| 3 | default | yelp_review | False |
# How many businesses in the dataset?
yelp_business.count()
127210
# Is there any null values in yelp_review?
yelp_business.select([count(when(isnull(c), c)).alias(c) for c in yelp_business.columns]).show()
+-----------+----+------------+-------+----+-----+-----------+--------+---------+-----+------------+-------+----------+ |business_id|name|neighborhood|address|city|state|postal_code|latitude|longitude|stars|review_count|is_open|categories| +-----------+----+------------+-------+----+-----+-----------+--------+---------+-----+------------+-------+----------+ | 0| 0| 75939| 0| 1| 0| 590| 1| 1| 0| 0| 0| 0| +-----------+----+------------+-------+----+-----+-----------+--------+---------+-----+------------+-------+----------+
# Subset all restaurants
yelp_restaurants = yelp_business.filter(yelp_business.categories.rlike('Food|Restaurants|Bars|Bakeries'))
yelp_restaurants.registerTempTable('yelp_restaurants')
yelp_restaurants.count()
58588
%%sql
select * from yelp_restaurants LIMIT 10
| business_id | name | neighborhood | address | city | state | postal_code | latitude | longitude | stars | review_count | is_open | categories | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | PfOCPjBrlQAnz__NXj9h_w | """Brick House Tavern + Tap""" | NaN | """581 Howe Ave""" | Cuyahoga Falls | OH | 44221 | 41.119535 | -81.475690 | 3.5 | 116 | 1.0 | American (New);Nightlife;Bars;Sandwiches;Ameri... |
| 1 | o9eMRCWt5PkpLDE0gOPtcQ | """Messina""" | NaN | """Richterstr. 11""" | Stuttgart | BW | 70567 | 48.727200 | 9.147950 | 4.0 | 5 | 1.0 | Italian;Restaurants |
| 2 | EsMcGiZaQuG1OOvL9iUFug | """Any Given Sundae""" | NaN | """2612 Brandt School Rd""" | Wexford | PA | 15090 | 40.615102 | -80.091349 | 5.0 | 15 | 1.0 | Coffee & Tea;Ice Cream & Frozen Yogurt;Food |
| 3 | XOSRcvtaKc_Q5H1SAzN20A | """East Coast Coffee""" | NaN | """737 West Pike St""" | Houston | PA | 15342 | 40.241548 | -80.212815 | 4.5 | 3 | 0.0 | Breakfast & Brunch;Gluten-Free;Coffee & Tea;Fo... |
| 4 | xcgFnd-MwkZeO5G2HQ0gAQ | """T & T Bakery and Cafe""" | Markham Village | """35 Main Street N""" | Markham | ON | L3P 1X3 | 43.875177 | -79.260153 | 4.0 | 38 | 1.0 | Bakeries;Bagels;Food |
| 5 | fNMVV_ZX7CJSDWQGdOM8Nw | """Showmars Government Center""" | Uptown | """600 E 4th St""" | Charlotte | NC | 28202 | 35.221647 | -80.839345 | 3.5 | 7 | 1.0 | Restaurants;American (Traditional) |
| 6 | l09JfMeQ6ynYs5MCJtrcmQ | """Alize Catering""" | Yonge and Eglinton | """2459 Yonge St""" | Toronto | ON | M4P 2H6 | 43.711399 | -79.399339 | 3.0 | 12 | 0.0 | Italian;French;Restaurants |
| 7 | lHYiCS-y8AFjUitv6MGpxg | """Starbucks""" | Liberty Village | """85 Hanna Avenue""" | Toronto | ON | M6K 3S3 | 43.639863 | -79.419533 | 4.0 | 21 | 1.0 | Food;Coffee & Tea |
| 8 | VSGcuYDV3q-AAZ9ZPq4fBQ | """Sportster's""" | The Danforth | """1430 Danforth Avenue""" | Toronto | ON | M4J 1N4 | 43.682867 | -79.326964 | 2.5 | 7 | 1.0 | Bars;Sports Bars;Nightlife |
| 9 | 1K4qrnfyzKzGgJPBEcJaNQ | """Chula Taberna Mexicana""" | Leslieville | """1058 Gerrard Street E""" | Toronto | ON | M4M 3A6 | 43.669256 | -79.335902 | 3.5 | 39 | 1.0 | Tiki Bars;Nightlife;Mexican;Restaurants;Bars |
%%sql
/* What are the top 3 cities with highest number of restourants? */
select city, count(*) as N from yelp_restaurants group by city order by N DESC LIMIT 3
| city | N | |
|---|---|---|
| 0 | Toronto | 8525 |
| 1 | Las Vegas | 5425 |
| 2 | Montréal | 3957 |
subset_toronto = spark.sql("select text, review_id, date from yelp_review where business_id IN (select business_id from yelp_restaurants where city IN ('Toronto'))")
subset_toronto.registerTempTable('subset_toronto')
# Number of reviews for Toronto
subset_toronto.count()
136201
subset_las_vegas = spark.sql("select text, review_id, date from yelp_review where business_id IN (select business_id from yelp_restaurants where city IN ('Las Vegas'))")
subset_las_vegas.registerTempTable('subset_las_vegas')
# Number of reviews for Las Vegas
subset_las_vegas.count()
384761
subset_montreal = spark.sql("select text, review_id, date from yelp_review where business_id IN (select business_id from yelp_restaurants where city IN ('Montréal'))")
subset_montreal.registerTempTable('subset_montreal')
# Number of reviews for Montreal
subset_montreal.count()
53412
%%sql
show tables
| database | tableName | isTemporary | |
|---|---|---|---|
| 0 | default | hivesampletable | False |
| 1 | default | yelp_business | False |
| 2 | default | yelp_review | False |
| 3 | subset_las_vegas | True | |
| 4 | subset_montreal | True | |
| 5 | subset_toronto | True | |
| 6 | yelp_restaurants | True |
subset_toronto.head(1)
[Row(text='"Changed my rating from 2 stars to 1 star, due to the attitude of either the staff or friends of Joe\'s Buffet Palace. I don\'t know why 8 of them think my review is ""funny"" (are they trying to troll me?) but I was not amused by the bad food experience I had here. I hope they continue to make fake 5-star reviews of this place because that is way funnier to me. I will never step foot in Joe\'s Buffet Palace again and will tell everyone I know to avoid it."', review_id='Xwz2i64CI0SE5wOgG9QB-w', date=datetime.datetime(2012, 4, 13, 0, 0))]
import pandas as pd
cities_df = pd.DataFrame()
cities = ['Toronto', 'Las Vegas', 'Montréal']
datasets = [subset_toronto, subset_las_vegas, subset_montreal]
for c in range(len(cities)):
city = cities[c]
dataset = datasets[c]
tokenizer = RegexTokenizer(inputCol='text', outputCol='tokenized', pattern='\s+|[,.";()]')
featurizedData0 = tokenizer.transform(dataset)
stopwords = StopWordsRemover(inputCol=tokenizer.getOutputCol(), outputCol='words') #stopWords=["b"]
featurizedData1 = stopwords.transform(featurizedData0)
# Term Frequency Vectorization - Option 2 (CountVectorizer) :
cv = CountVectorizer(inputCol="words", outputCol="rawFeatures", vocabSize = 2000)
cvmodel = cv.fit(featurizedData1)
featurizedData = cvmodel.transform(featurizedData1)
vocab = cvmodel.vocabulary
vocab_broadcast = sc.broadcast(vocab)
# TFIDF
idf = IDF(inputCol="rawFeatures", outputCol="features")
idfModel = idf.fit(featurizedData)
rescaledData = idfModel.transform(featurizedData)
# Generate 10 topics for each city
lda = LDA(k=10, seed=1234, optimizer='online', featuresCol='features')
ldamodel = lda.fit(rescaledData)
# Load topics
topicIndices = ldamodel.describeTopics(maxTermsPerTopic=5)
vocablist = cvmodel.vocabulary
# Preprocess model resuts
topics_words = topicIndices.rdd\
.map(lambda row: row['termIndices'])\
.map(lambda idx_list: [vocab[idx] for idx in idx_list])\ # map tokens from vocabulary
.collect()
weights_map = topicIndices.rdd\
.map(lambda row: row['termWeights']).collect()
list(weights_map)
# Merge terms and weights into one dataframe
df_topics = pd.DataFrame(
{'Terms': topics_words,
'Weights': weights_map
})
# Add additional inforamtion: City and Number of Topic
for i in range(len(df_topics)):
for j in range(len(df_topics['Terms'][i])):
row = [{'Topic': i+1, 'Term': df_topics['Terms'][i][j], 'Weight': df_topics['Weights'][i][j], 'City': city}]
cities_df = cities_df.append(row, ignore_index=True)
pd.options.display.max_rows=300
print(cities_df)
City Term Topic Weight 0 Toronto pizza 1 0.016716 1 Toronto ramen 1 0.010416 2 Toronto eggs 1 0.008036 3 Toronto bacon 1 0.006985 4 Toronto fries 1 0.006122 5 Toronto coffee 2 0.012492 6 Toronto tea 2 0.011473 7 Toronto store 2 0.006716 8 Toronto get 2 0.005462 9 Toronto place 2 0.005394 10 Toronto food 3 0.006700 11 Toronto good 3 0.006615 12 Toronto place 3 0.006451 13 Toronto great 3 0.006296 14 Toronto breakfast 3 0.006066 15 Toronto great 4 0.010428 16 Toronto service 4 0.006970 17 Toronto food 4 0.006837 18 Toronto atmosphere 4 0.006018 19 Toronto wine 4 0.005877 20 Toronto minutes 5 0.008381 21 Toronto us 5 0.008012 22 Toronto order 5 0.006432 23 Toronto food 5 0.006317 24 Toronto time 5 0.006211 25 Toronto us 6 0.007896 26 Toronto food 6 0.006179 27 Toronto service 6 0.006043 28 Toronto customer 6 0.005761 29 Toronto asked 6 0.005628 30 Toronto chicken 7 0.013164 31 Toronto rice 7 0.008658 32 Toronto soup 7 0.008040 33 Toronto spicy 7 0.007311 34 Toronto fried 7 0.006963 35 Toronto cream 8 0.017626 36 Toronto ice 8 0.016885 37 Toronto chocolate 8 0.010074 38 Toronto cake 8 0.009935 39 Toronto sweet 8 0.006235 40 Toronto pho 9 0.011126 41 Toronto burger 9 0.008292 42 Toronto chicken 9 0.007624 43 Toronto beef 9 0.006002 44 Toronto pork 9 0.005612 45 Toronto place 10 0.007603 46 Toronto location 10 0.006967 47 Toronto bar 10 0.006590 48 Toronto great 10 0.005867 49 Toronto good 10 0.005848 50 Las Vegas sushi 1 0.011330 51 Las Vegas & 1 0.007256 52 Las Vegas steak 1 0.005983 53 Las Vegas salad 1 0.004997 54 Las Vegas good 1 0.004936 55 Las Vegas burgers 2 0.008214 56 Las Vegas dim 2 0.007210 57 Las Vegas sum 2 0.006725 58 Las Vegas burger 2 0.006477 59 Las Vegas good 2 0.006069 60 Las Vegas pizza 3 0.020863 61 Las Vegas always 3 0.010976 62 Las Vegas great 3 0.009376 63 Las Vegas friendly 3 0.008517 64 Las Vegas love 3 0.008352 65 Las Vegas great 4 0.009927 66 Las Vegas vegas 4 0.008146 67 Las Vegas food 4 0.007817 68 Las Vegas amazing 4 0.007642 69 Las Vegas service 4 0.007486 70 Las Vegas breakfast 5 0.008176 71 Las Vegas buffet 5 0.006748 72 Las Vegas eggs 5 0.006710 73 Las Vegas chocolate 5 0.005840 74 Las Vegas good 5 0.005676 75 Las Vegas lobster 6 0.010598 76 Las Vegas soup 6 0.010356 77 Las Vegas chicken 6 0.008993 78 Las Vegas rice 6 0.008741 79 Las Vegas ordered 6 0.007910 80 Las Vegas line 7 0.007525 81 Las Vegas drive 7 0.006749 82 Las Vegas wait 7 0.005878 83 Las Vegas get 7 0.005754 84 Las Vegas taco 7 0.005159 85 Las Vegas burger 8 0.014047 86 Las Vegas fries 8 0.010258 87 Las Vegas cheese 8 0.009573 88 Las Vegas chicken 8 0.008377 89 Las Vegas sauce 8 0.005758 90 Las Vegas us 9 0.011784 91 Las Vegas minutes 9 0.007803 92 Las Vegas order 9 0.007407 93 Las Vegas asked 9 0.007050 94 Las Vegas said 9 0.006904 95 Las Vegas room 10 0.007113 96 Las Vegas hotel 10 0.006595 97 Las Vegas bar 10 0.006340 98 Las Vegas strip 10 0.006204 99 Las Vegas place 10 0.006148 100 Montréal gras 1 0.015071 101 Montréal foie 1 0.014215 102 Montréal chocolate 1 0.012642 103 Montréal rich 1 0.006829 104 Montréal lobster 1 0.006413 105 Montréal de 2 0.031393 106 Montréal et 2 0.027222 107 Montréal le 2 0.024552 108 Montréal la 2 0.021022 109 Montréal un 2 0.019353 110 Montréal bagels 3 0.019748 111 Montréal bagel 3 0.017836 112 Montréal pho 3 0.017467 113 Montréal meat 3 0.014684 114 Montréal smoked 3 0.014048 115 Montréal us 4 0.011371 116 Montréal minutes 4 0.007184 117 Montréal food 4 0.006491 118 Montréal order 4 0.005953 119 Montréal asked 4 0.005918 120 Montréal good 5 0.006617 121 Montréal meat 5 0.005530 122 Montréal beef 5 0.005314 123 Montréal place 5 0.005286 124 Montréal montreal 5 0.005180 125 Montréal coffee 6 0.009671 126 Montréal great 6 0.008989 127 Montréal place 6 0.007131 128 Montréal good 6 0.006841 129 Montréal best 6 0.006645 130 Montréal great 7 0.008221 131 Montréal breakfast 7 0.007989 132 Montréal brunch 7 0.007599 133 Montréal food 7 0.006790 134 Montréal friendly 7 0.006211 135 Montréal tea 8 0.007554 136 Montréal bun 8 0.007414 137 Montréal latte 8 0.007186 138 Montréal bar 8 0.007078 139 Montréal good 8 0.006722 140 Montréal burger 9 0.008815 141 Montréal sushi 9 0.007186 142 Montréal food 9 0.006334 143 Montréal chicken 9 0.006261 144 Montréal place 9 0.005796 145 Montréal chicken 10 0.008390 146 Montréal good 10 0.006735 147 Montréal poutine 10 0.006380 148 Montréal like 10 0.005741 149 Montréal fries 10 0.005347
# Create a new Spark DataFrame
schema = StructType([StructField('City', StringType(), True), StructField('Term', StringType(), True), \
StructField('Topic', IntegerType(), True), StructField('Weight', IntegerType(), True)])
cities_DF = spark.createDataFrame(sc.emptyRDD(), schema)
# Convert Pandas DataFrame into Spark DataFrame (in pandas version > 0.19 it is done in one line)
for i in range(len(cities_df)):
newRow = spark.createDataFrame([Row(City=cities_df.iloc[i]['City'], Topic=cities_df.iloc[i]['Topic'].tolist(),\
Term=cities_df.iloc[i]['Term'], Weight=cities_df.iloc[i]['Weight'].tolist() )])
cities_DF = cities_DF.union(newRow)
cities_DF.head(1)
[Row(City='Toronto', Term='pizza', Topic=1, Weight=0.016716298745811146)]
cities_DF.write.saveAsTable("cities_DF", mode="overwrite")
cities_DF.count()
150
%%sql
show tables
| database | tableName | isTemporary | |
|---|---|---|---|
| 0 | default | cities_df | False |
| 1 | default | hivesampletable | False |
| 2 | default | yelp_business | False |
| 3 | default | yelp_review | False |
| 4 | subset_las_vegas | True | |
| 5 | subset_montreal | True | |
| 6 | subset_toronto | True | |
| 7 | yelp_restaurants | True |
# Subset the data from business table
BusinessData = yelp_business.select(yelp_business.categories, yelp_business.business_id, yelp_business.name)
BusinessData.registerTempTable('BusinessData')
BusinessData.head(1)
[Row(categories="Hair Stylists;Hair Salons;Men's Hair Salons;Blow Dry/Out Services;Hair Extensions;Beauty & Spas", business_id='He-G7vWjzVUysIKrfNbPUQ', name='"""Stephen Szabo Salon"""')]
# Tokenizing text
tokenizer = RegexTokenizer(inputCol='categories', outputCol='tokenized', pattern='\s+|[,.";()]')
featurizedData0 = tokenizer.transform(BusinessData)
stopwords = StopWordsRemover(inputCol=tokenizer.getOutputCol(), outputCol='words')
featurizedData1 = stopwords.transform(featurizedData0)
# Term Frequency Vectorization:
cv = CountVectorizer(inputCol="words", outputCol="rawFeatures", vocabSize = 1000)
cvmodel = cv.fit(featurizedData1)
featurizedData = cvmodel.transform(featurizedData1)
vocab = cvmodel.vocabulary
vocab_broadcast = sc.broadcast(vocab)
idf = IDF(inputCol="rawFeatures", outputCol="features")
idfModel = idf.fit(featurizedData)
rescaledData = idfModel.transform(featurizedData) # TFIDF
# Generate 25 Topics:
lda = LDA(k=25, seed=1234, optimizer='online', featuresCol="features")
ldamodel = lda.fit(rescaledData)
ll = ldamodel.logLikelihood(rescaledData)
lp = ldamodel.logPerplexity(rescaledData)
print("The lower bound on the log likelihood of the entire corpus: " + str(ll))
print("The upper bound on perplexity: " + str(lp))
The lower bound on the log likelihood of the entire corpus: -11556902.025592694 The upper bound on perplexity: 4.185029986601038
ldamodel.isDistributed()
False
ldatopics = ldamodel.describeTopics()
ldatopics.show(25)
+-----+--------------------+--------------------+ |topic| termIndices| termWeights| +-----+--------------------+--------------------+ | 0|[51, 6, 58, 72, 9...|[0.11221426670643...| | 1|[20, 5, 37, 35, 4...|[0.07692465479914...| | 2|[16, 129, 48, 99,...|[0.06926009550134...| | 3|[28, 53, 119, 153...|[0.22136805439616...| | 4|[142, 148, 163, 1...|[0.06473304219797...| | 5|[43, 22, 105, 3, ...|[0.06831633650490...| | 6|[31, 34, 3, 114, ...|[0.13964217564008...| | 7|[113, 14, 140, 10...|[0.05955085365637...| | 8|[32, 41, 74, 13, ...|[0.08912716556999...| | 9|[17, 38, 39, 26, ...|[0.13455815777818...| | 10|[50, 73, 3, 71, 2...|[0.15113119770760...| | 11|[24, 25, 109, 201...|[0.13847020934105...| | 12|[3, 29, 40, 1, 33...|[0.11725222116549...| | 13|[68, 71, 15, 20, ...|[0.05596693038739...| | 14|[5, 2, 83, 133, 2...|[0.08818045054138...| | 15|[76, 52, 61, 102,...|[0.09700588055615...| | 16|[7, 8, 21, 23, 0,...|[0.12025573319319...| | 17|[10, 12, 2, 0, 93...|[0.14568510287689...| | 18|[47, 66, 75, 1, 1...|[0.11967617178241...| | 19|[56, 77, 146, 3, ...|[0.14899612570985...| | 20|[19, 22, 135, 91,...|[0.12408254685498...| | 21|[42, 46, 1, 139, ...|[0.12653463304892...| | 22|[18, 11, 16, 65, ...|[0.11524058411067...| | 23|[6, 9, 80, 86, 85...|[0.15309688013229...| | 24|[49, 30, 81, 55, ...|[0.11113403052155...| +-----+--------------------+--------------------+
topicIndices = ldamodel.describeTopics(maxTermsPerTopic=5)
vocablist = cvmodel.vocabulary
topics_rdd = topicIndices.rdd
type(topics_rdd)
topics_words = topics_rdd\
.map(lambda row: row['termIndices'])\
.map(lambda idx_list: [vocab[idx] for idx in idx_list])\
.collect()
for index, topic in enumerate(topics_words):
print("topic: ", index)
print(topic)
print("------------------")
topic: 0 ['chinese', 'bars', 'cafes', 'pubs', 'nightlife'] ------------------ topic: 1 ['arts', 'home', 'garden', 'entertainment', 'shopping'] ------------------ topic: 2 ['repair', 'gas', 'stations', 'convenience', 'automotive'] ------------------ topic: 3 ['pet', 'pets', 'delis', 'groomers', 'sandwiches'] ------------------ topic: 4 ['pool', 'transportation', 'wash', 'detailing', 'storage'] ------------------ topic: 5 ['specialty', 'health', 'dentists', 'food', 'medical'] ------------------ topic: 6 ['tea', 'coffee', 'food', 'photography', '&'] ------------------ topic: 7 ['department', 'stores', 'air', 'drugstores', 'heating'] ------------------ topic: 8 ['pizza', 'italian', 'laundry', 'american', 'restaurants'] ------------------ topic: 9 ['hotels', 'estate', 'real', 'travel', 'services'] ------------------ topic: 10 ['grocery', 'seafood', 'food', 'schools', 'providers'] ------------------ topic: 11 ['life', 'active', 'parks', 'ethnic', 'specialty'] ------------------ topic: 12 ['food', 'fast', 'burgers', 'restaurants', 'sandwiches'] ------------------ topic: 13 ['education', 'schools', 'local', 'arts', 'entertainment'] ------------------ topic: 14 ['home', 'services', 'contractors', 'installation', 'bagels'] ------------------ topic: 15 ['beer', 'cleaning', 'wine', 'spirits', 'home'] ------------------ topic: 16 ['spas', 'beauty', 'hair', 'salons', '&'] ------------------ topic: 17 ['event', 'planning', 'services', '&', 'caterers'] ------------------ topic: 18 ['mexican', 'chicken', 'wings', 'restaurants', 'asian'] ------------------ topic: 19 ['bakeries', 'desserts', 'barbers', 'food', 'landscaping'] ------------------ topic: 20 ['medical', 'health', 'jewelry', 'doctors', 'centers'] ------------------ topic: 21 ['breakfast', 'brunch', 'restaurants', 'diners', 'american'] ------------------ topic: 22 ['auto', 'automotive', 'repair', 'dealers', 'tires'] ------------------ topic: 23 ['bars', 'nightlife', 'ice', 'yogurt', 'frozen'] ------------------ topic: 24 ['clothing', 'fashion', "women's", 'sports', 'shopping'] ------------------